{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "电阻率线性回归.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true,
      "authorship_tag": "ABX9TyNVCB4vg+MHPXntd6fKOv/O",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/sunyingjian/-Logging-related-network/blob/master/%E7%94%B5%E9%98%BB%E7%8E%87%E7%BA%BF%E6%80%A7%E5%9B%9E%E5%BD%92.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "3Z8cK8h2Avbv",
        "colab_type": "text"
      },
      "source": [
        "<code>function ClickConnect(){\n",
        "console.log(\"Working\"); \n",
        "document.querySelector(\"colab-connect-button\").click() \n",
        "}setInterval(ClickConnect,6000)</code>"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jtClEMAMLVHw",
        "colab_type": "code",
        "cellView": "both",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "ccbfb81f-34bf-4216-ed84-105f376f62ef"
      },
      "source": [
        "#@markdown <h3>← 输入了代码后运行以防止断开</h>\n",
        "\n",
        "\n",
        "import IPython\n",
        "from google.colab import output\n",
        "\n",
        "display(IPython.display.Javascript('''\n",
        " function ClickConnect(){\n",
        "   btn = document.querySelector(\"colab-connect-button\")\n",
        "   if (btn != null){\n",
        "     console.log(\"Click colab-connect-button\"); \n",
        "     btn.click() \n",
        "     }\n",
        "   \n",
        "   btn = document.getElementById('ok')\n",
        "   if (btn != null){\n",
        "     console.log(\"Click reconnect\"); \n",
        "     btn.click() \n",
        "     }\n",
        "  }\n",
        "  \n",
        "setInterval(ClickConnect,60000)\n",
        "'''))\n",
        "\n",
        "print(\"Done.\")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/javascript": [
              "\n",
              " function ClickConnect(){\n",
              "   btn = document.querySelector(\"colab-connect-button\")\n",
              "   if (btn != null){\n",
              "     console.log(\"Click colab-connect-button\"); \n",
              "     btn.click() \n",
              "     }\n",
              "   \n",
              "   btn = document.getElementById('ok')\n",
              "   if (btn != null){\n",
              "     console.log(\"Click reconnect\"); \n",
              "     btn.click() \n",
              "     }\n",
              "  }\n",
              "  \n",
              "setInterval(ClickConnect,60000)\n"
            ],
            "text/plain": [
              "<IPython.core.display.Javascript object>"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "Done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "TEjuU_5vCALS",
        "colab_type": "text"
      },
      "source": [
        "##准备数据"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "qhSnYTudB-CG",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 155
        },
        "outputId": "89da57c7-34b3-493f-b9c7-355952a13143"
      },
      "source": [
        "! git clone https://github.com/sunyingjian/numpy-.git\n",
        "!git clone https://github.com/RRdmlearning/Machine-Learning-From-Scratch.git\n",
        "!git clone https://github.com/Jack-Cherish/Machine-Learning.git\n",
        "! git clone https://github.com/seg/tutorials-2016.git"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "fatal: destination path 'numpy-' already exists and is not an empty directory.\n",
            "fatal: destination path 'Machine-Learning-From-Scratch' already exists and is not an empty directory.\n",
            "fatal: destination path 'Machine-Learning' already exists and is not an empty directory.\n",
            "Cloning into 'tutorials-2016'...\n",
            "remote: Enumerating objects: 161, done.\u001b[K\n",
            "remote: Total 161 (delta 0), reused 0 (delta 0), pack-reused 161\u001b[K\n",
            "Receiving objects: 100% (161/161), 16.86 MiB | 7.19 MiB/s, done.\n",
            "Resolving deltas: 100% (64/64), done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SWdKrn4UCGgE",
        "colab_type": "text"
      },
      "source": [
        "##导入谷歌云端硬盘"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jnWES-5WCFoh",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 124
        },
        "outputId": "c3d8d61d-8849-4f25-b4d5-671af9d86026"
      },
      "source": [
        "import os\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n",
            "\n",
            "Enter your authorization code:\n",
            "··········\n",
            "Mounted at /content/drive\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "jHKIpBhzEOYn",
        "colab_type": "text"
      },
      "source": [
        "##开始"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "jh8G9E8nENCX",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 407
        },
        "outputId": "6a132f2c-3edb-4b06-b830-6a7b9a65b5df"
      },
      "source": [
        "%matplotlib inline\n",
        "#%matplotlib inline 可以在Ipython编译器里直接使用，功能是可以内嵌绘图，并且可以省略掉plt.show()这一步。\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib as mpl\n",
        "import matplotlib.pyplot as plt\n",
        "import matplotlib.colors as colors\n",
        "from mpl_toolkits.axes_grid1 import make_axes_locatable\n",
        "from pandas import set_option\n",
        "import seaborn as sns\n",
        "from sklearn import  model_selection\n",
        "import statsmodels.api as sm\n",
        "import statsmodels.formula.api as smf\n",
        "set_option(\"display.max_rows\", 10)#设置要显示的默认行数，显示的最大行数是10\n",
        "pd.options.mode.chained_assignment = None #为了在增加列表行数的时候防止出现setting with copy warning\n",
        "training_data = pd.read_csv('/content/numpy-/3345train data.csv')\n",
        "training_data\n",
        "testing_data = pd.read_csv('/content/numpy-/3345test_data.csv')\n",
        "all_data = pd.read_csv('/content/numpy-/延安油田总数居.csv')\n",
        "all_data"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>AC</th>\n",
              "      <th>CAL</th>\n",
              "      <th>GR</th>\n",
              "      <th>K</th>\n",
              "      <th>RD</th>\n",
              "      <th>SP</th>\n",
              "      <th>Core Lithology</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0.029102</td>\n",
              "      <td>0.031789</td>\n",
              "      <td>0.026724</td>\n",
              "      <td>0.202335</td>\n",
              "      <td>0.078986</td>\n",
              "      <td>0.333754</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>0.032883</td>\n",
              "      <td>0.033469</td>\n",
              "      <td>0.030592</td>\n",
              "      <td>0.203141</td>\n",
              "      <td>0.076064</td>\n",
              "      <td>0.333669</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>0.034385</td>\n",
              "      <td>0.037006</td>\n",
              "      <td>0.032359</td>\n",
              "      <td>0.200121</td>\n",
              "      <td>0.074503</td>\n",
              "      <td>0.333619</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>0.243816</td>\n",
              "      <td>0.143381</td>\n",
              "      <td>0.086581</td>\n",
              "      <td>0.290115</td>\n",
              "      <td>0.017649</td>\n",
              "      <td>0.094025</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>0.248043</td>\n",
              "      <td>0.146786</td>\n",
              "      <td>0.084643</td>\n",
              "      <td>0.283874</td>\n",
              "      <td>0.017286</td>\n",
              "      <td>0.090554</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>...</th>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "      <td>...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3706</th>\n",
              "      <td>0.016947</td>\n",
              "      <td>0.138827</td>\n",
              "      <td>0.118400</td>\n",
              "      <td>0.358567</td>\n",
              "      <td>0.281687</td>\n",
              "      <td>0.302936</td>\n",
              "      <td>6</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3707</th>\n",
              "      <td>0.131043</td>\n",
              "      <td>0.069016</td>\n",
              "      <td>0.029354</td>\n",
              "      <td>0.060197</td>\n",
              "      <td>0.282093</td>\n",
              "      <td>0.253809</td>\n",
              "      <td>3</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3708</th>\n",
              "      <td>0.036182</td>\n",
              "      <td>0.032010</td>\n",
              "      <td>0.030090</td>\n",
              "      <td>0.071069</td>\n",
              "      <td>0.359737</td>\n",
              "      <td>0.435486</td>\n",
              "      <td>5</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3709</th>\n",
              "      <td>0.014562</td>\n",
              "      <td>0.127332</td>\n",
              "      <td>0.035014</td>\n",
              "      <td>0.178579</td>\n",
              "      <td>0.803566</td>\n",
              "      <td>0.311641</td>\n",
              "      <td>6</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3710</th>\n",
              "      <td>0.014427</td>\n",
              "      <td>0.134185</td>\n",
              "      <td>0.033460</td>\n",
              "      <td>0.186430</td>\n",
              "      <td>0.891134</td>\n",
              "      <td>0.311652</td>\n",
              "      <td>6</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>3711 rows × 7 columns</p>\n",
              "</div>"
            ],
            "text/plain": [
              "            AC       CAL        GR  ...        RD        SP  Core Lithology\n",
              "0     0.029102  0.031789  0.026724  ...  0.078986  0.333754               5\n",
              "1     0.032883  0.033469  0.030592  ...  0.076064  0.333669               5\n",
              "2     0.034385  0.037006  0.032359  ...  0.074503  0.333619               5\n",
              "3     0.243816  0.143381  0.086581  ...  0.017649  0.094025               3\n",
              "4     0.248043  0.146786  0.084643  ...  0.017286  0.090554               3\n",
              "...        ...       ...       ...  ...       ...       ...             ...\n",
              "3706  0.016947  0.138827  0.118400  ...  0.281687  0.302936               6\n",
              "3707  0.131043  0.069016  0.029354  ...  0.282093  0.253809               3\n",
              "3708  0.036182  0.032010  0.030090  ...  0.359737  0.435486               5\n",
              "3709  0.014562  0.127332  0.035014  ...  0.803566  0.311641               6\n",
              "3710  0.014427  0.134185  0.033460  ...  0.891134  0.311652               6\n",
              "\n",
              "[3711 rows x 7 columns]"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 50
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "wZCBXh6FER5V",
        "colab_type": "text"
      },
      "source": [
        "##根据数据集建模"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "6myJMgn-EUJj",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 155
        },
        "outputId": "fd8cce8e-2120-4f68-a14c-82c9ccbe2848"
      },
      "source": [
        "model = smf.ols(formula='RD~AC+CAL+GR+K+SP',data=training_data).fit()\n",
        "print(f'模型的偏回归系数为{model.params}\\n')"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "模型的偏回归系数为Intercept    0.137135\n",
            "AC          -0.197023\n",
            "CAL          0.003628\n",
            "GR           0.005497\n",
            "K           -0.127530\n",
            "SP          -0.008810\n",
            "dtype: float64\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "_cGqxIzy6ylo",
        "colab_type": "text"
      },
      "source": [
        "##针对测试集进行处理"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UJmwmK6SHJlq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 276
        },
        "outputId": "5b10a53c-f03e-4158-8e11-f6802963486a"
      },
      "source": [
        "#删除要预测的RD变量\n",
        "test_data = testing_data.drop(['Core Lithology'], axis=1)\n",
        "test_data = test_data.drop(['RD'],axis=1)\n",
        "pred = model.predict(exog=test_data)\n",
        "print('对比预测值和实际值的差异:\\n',pd.DataFrame({'Prediction':pred,'Real':testing_data.RD}))"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "对比预测值和实际值的差异:\n",
            "      Prediction      Real\n",
            "0      0.110352  0.001420\n",
            "1      0.041023  0.002744\n",
            "2     -0.003096  0.003874\n",
            "3      0.037381  0.003290\n",
            "4      0.040767  0.003455\n",
            "..          ...       ...\n",
            "361    0.086554  0.281687\n",
            "362    0.101816  0.282093\n",
            "363    0.117388  0.359737\n",
            "364    0.109401  0.803566\n",
            "365    0.108443  0.891134\n",
            "\n",
            "[366 rows x 2 columns]\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "YEvUeBRnHViP",
        "colab_type": "text"
      },
      "source": [
        "##回归模型的假设检验\n",
        "\n",
        "我们要证明针对于电阻率得预测确实可以使用多元线性回归进行预测"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LY9Uf0NHHc_N",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "6b57b49d-baef-4893-bbd3-098711a97629"
      },
      "source": [
        "#计算建模中的因变量均值\n",
        "ybar = training_data.RD.mean()\n",
        "ybar"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "0.04917119512705526"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 54
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2Xiqhr4RDJi8",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "81c3465f-0822-4ded-8943-af892d02bba9"
      },
      "source": [
        "#统计变量个数与观测个数\n",
        "p=model.df_model\n",
        "n=training_data.shape[0]\n",
        "p"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "5.0"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 56
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "26APPFhBDfD6",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "6ee5a781-80e2-4c4b-edb2-db87dc5f714b"
      },
      "source": [
        "##计算回归离差平方和\n",
        "RSS=np.sum((model.fittedvalues-ybar)**2)\n",
        "RSS"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "4.392695973369205"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 57
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "Jl-I1J8JDviY",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "16939f41-f526-48b9-ee23-f9f790dd8335"
      },
      "source": [
        "#计算误差平方和\n",
        "ESS = np.sum(model.resid**2)\n",
        "ESS"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "21.028772415264456"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 58
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "S1p3bNNBFIsO",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "ba022b83-e44d-4930-eef2-e9c51c222484"
      },
      "source": [
        "#计算F值\n",
        "F = (RSS/p)/(ESS/(n-p-1))\n",
        "F"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "139.49660555965764"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 59
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "EVw1VQroFWEt",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "ae72bdd1-a568-482d-8d4d-83154e37cd9d"
      },
      "source": [
        "#一行代码解决求f\n",
        "f= model.fvalue\n",
        "f"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "139.49660555965656"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 60
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nUrDBrg4FrKx",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "e46ddb3e-ef8a-46fd-d38c-998be52d4f43"
      },
      "source": [
        "#计算f值得理论值\n",
        "from scipy.stats import f\n",
        "F_Theroy = f.ppf(q=0.95, dfn=p, dfd = n-p-1)\n",
        "F_Theroy"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "2.2167769775463118"
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 62
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7NkzZFSjG4yH",
        "colab_type": "text"
      },
      "source": [
        "##到底哪些变量可以影响电阻率建模呢\n",
        "\n",
        "模型的概览信息包含三个部分，第一部分主要是有关模型的信息，例如模型的判决系数R2，用来衡量自变量对因变量的解释程度，模型的F统计值，用来检验模型的显著性；第二部分主要包含偏回归系数的信息，例如回归系数的Coef、t统计量值、回归系数的置信区间等；第三部分主要涉及模型的误差项e的有关信息。\n",
        "\n",
        "在第二部分的内容中，含有每个偏回归系数的t统计量值，它的计算就是由估计值coef和标准差std err的商所得的，同时也有t统计量值对应的概率值p，用来判别统计量是否显著的直接办法，通常概率值p小于0.05时表示拒绝原假设。\n",
        "\n",
        "所以我们发现真正对于电阻率RD回归有影响的变量有CAL,GR,SP"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4imidxFNGSqn",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 527
        },
        "outputId": "9d724c80-4fc4-458c-a9f2-4c7f3e293c31"
      },
      "source": [
        "model.summary()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "<table class=\"simpletable\">\n",
              "<caption>OLS Regression Results</caption>\n",
              "<tr>\n",
              "  <th>Dep. Variable:</th>           <td>RD</td>        <th>  R-squared:         </th> <td>   0.173</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th> <td>   0.172</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th> <td>   139.5</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Date:</th>             <td>Sun, 05 Jul 2020</td> <th>  Prob (F-statistic):</th> <td>1.06e-134</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Time:</th>                 <td>08:30:46</td>     <th>  Log-Likelihood:    </th> <td>  3732.1</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>No. Observations:</th>      <td>  3345</td>      <th>  AIC:               </th> <td>  -7452.</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Df Residuals:</th>          <td>  3339</td>      <th>  BIC:               </th> <td>  -7416.</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Df Model:</th>              <td>     5</td>      <th>                     </th>     <td> </td>    \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>     <td> </td>    \n",
              "</tr>\n",
              "</table>\n",
              "<table class=\"simpletable\">\n",
              "<tr>\n",
              "      <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th>  <th>[0.025</th>    <th>0.975]</th>  \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Intercept</th> <td>    0.1371</td> <td>    0.004</td> <td>   31.218</td> <td> 0.000</td> <td>    0.129</td> <td>    0.146</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>AC</th>        <td>   -0.1970</td> <td>    0.016</td> <td>  -12.626</td> <td> 0.000</td> <td>   -0.228</td> <td>   -0.166</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>CAL</th>       <td>    0.0036</td> <td>    0.012</td> <td>    0.303</td> <td> 0.762</td> <td>   -0.020</td> <td>    0.027</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>GR</th>        <td>    0.0055</td> <td>    0.021</td> <td>    0.264</td> <td> 0.791</td> <td>   -0.035</td> <td>    0.046</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>K</th>         <td>   -0.1275</td> <td>    0.012</td> <td>  -10.676</td> <td> 0.000</td> <td>   -0.151</td> <td>   -0.104</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>SP</th>        <td>   -0.0088</td> <td>    0.007</td> <td>   -1.226</td> <td> 0.220</td> <td>   -0.023</td> <td>    0.005</td>\n",
              "</tr>\n",
              "</table>\n",
              "<table class=\"simpletable\">\n",
              "<tr>\n",
              "  <th>Omnibus:</th>       <td>3617.046</td> <th>  Durbin-Watson:     </th>  <td>   1.900</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Prob(Omnibus):</th>  <td> 0.000</td>  <th>  Jarque-Bera (JB):  </th> <td>258858.581</td>\n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Skew:</th>           <td> 5.494</td>  <th>  Prob(JB):          </th>  <td>    0.00</td> \n",
              "</tr>\n",
              "<tr>\n",
              "  <th>Kurtosis:</th>       <td>44.672</td>  <th>  Cond. No.          </th>  <td>    19.7</td> \n",
              "</tr>\n",
              "</table><br/><br/>Warnings:<br/>[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
            ],
            "text/plain": [
              "<class 'statsmodels.iolib.summary.Summary'>\n",
              "\"\"\"\n",
              "                            OLS Regression Results                            \n",
              "==============================================================================\n",
              "Dep. Variable:                     RD   R-squared:                       0.173\n",
              "Model:                            OLS   Adj. R-squared:                  0.172\n",
              "Method:                 Least Squares   F-statistic:                     139.5\n",
              "Date:                Sun, 05 Jul 2020   Prob (F-statistic):          1.06e-134\n",
              "Time:                        08:30:46   Log-Likelihood:                 3732.1\n",
              "No. Observations:                3345   AIC:                            -7452.\n",
              "Df Residuals:                    3339   BIC:                            -7416.\n",
              "Df Model:                           5                                         \n",
              "Covariance Type:            nonrobust                                         \n",
              "==============================================================================\n",
              "                 coef    std err          t      P>|t|      [0.025      0.975]\n",
              "------------------------------------------------------------------------------\n",
              "Intercept      0.1371      0.004     31.218      0.000       0.129       0.146\n",
              "AC            -0.1970      0.016    -12.626      0.000      -0.228      -0.166\n",
              "CAL            0.0036      0.012      0.303      0.762      -0.020       0.027\n",
              "GR             0.0055      0.021      0.264      0.791      -0.035       0.046\n",
              "K             -0.1275      0.012    -10.676      0.000      -0.151      -0.104\n",
              "SP            -0.0088      0.007     -1.226      0.220      -0.023       0.005\n",
              "==============================================================================\n",
              "Omnibus:                     3617.046   Durbin-Watson:                   1.900\n",
              "Prob(Omnibus):                  0.000   Jarque-Bera (JB):           258858.581\n",
              "Skew:                           5.494   Prob(JB):                         0.00\n",
              "Kurtosis:                      44.672   Cond. No.                         19.7\n",
              "==============================================================================\n",
              "\n",
              "Warnings:\n",
              "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
              "\"\"\""
            ]
          },
          "metadata": {
            "tags": []
          },
          "execution_count": 63
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "d9gZw7rJIL4S",
        "colab_type": "text"
      },
      "source": [
        "##回归诊断\n",
        "\n",
        "当回归模型建好之后，并不意味着建模过程的结束，还需要进一步对模型进行诊断。由统计学知识可知，线性回归模型需要满足一些假设前提，只有满足了这些假设，模型才是合理的。需满足：误差e服从正态分布，无多重共线性，线性相关性，误差项e的独立性，方差齐性。\n",
        "\n",
        "正态性检验，由y=Xβ+e来说，等式右边的自变量属于已知变量，而等式左边的因变量服从正态分布，要求残差项要求正态分布，但其实质就是要求因变量服从正态分布。关于正态性检验通常运用两类方法，分别是定性的图形法（直方图、PP图或QQ图）和定量的非参数法（Shapiro检验和K-S检验），以下是直方图法，"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "hsBdP6DGJ3BQ",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 279
        },
        "outputId": "5cd1e00b-0346-412b-f7d4-445194603e23"
      },
      "source": [
        "import scipy.stats as st\n",
        "sns.distplot(a=testing_data.RD,bins=1,fit=st.norm,norm_hist=True,\n",
        "             hist_kws={'color':'steelblue','edgecolor':'black'},\n",
        "             kde_kws = {'color':'black','linestyle':'--','label':'Nuclear density curve'},\n",
        "             fit_kws = {'color':'red','linestyle':':','label':'Positive density curve'})\n",
        "plt.legend()\n",
        "plt.show()"
      ],
      "execution_count": 74,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXMAAAEGCAYAAACXVXXgAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3dd3hU1fbw8e9OSIA0ShJiMEgooacQQpMuoihFQYqIIl2xUq7Cq/wAUa/lIraLBUVRREFRmoBeRXpTogFpQkDAIC2hhPS23z8mGRPSZiaTTCZnfZ6Hhzllzllnkqzs7LP3OkprjRBCCOfm4ugAhBBClJ0kcyGEqAIkmQshRBUgyVwIIaoASeZCCFEFVKvIk/n5+eng4OCKPKUQQji96OjoeK21f0n7VGgyDw4OZu/evRV5SiGEcHpKqVOl7SPdLEIIUQVIMhdCiCpAkrkQQlQBFdpnLkRVlpmZSVxcHGlpaY4ORTipGjVqEBQUhJubm9XvlWQuhJ3ExcXh7e1NcHAwSilHhyOcjNaahIQE4uLiaNSokdXvl24WIewkLS0NX19fSeTCJkopfH19bf7LTpK5EHYkiVyURVm+fySZCyFEFSDJPNemTZtQSnHs2DFHhyKEzZRSTJs2zbw8b9485syZY9OxRo8ezYoVK+wUWfF69uxp18mEf//9N0OGDAEgJiaG9evX2+3YlZkk81zffPMNAD/++KODIxHCdtWrV+ebb74hPj7e0aGYZWVlVej56tevb/4lVNHJvKKvNT9J5rkefvhhAOrWrevgSISwXbVq1Zg4cSKvv/56oW3Xt7S9vLzMr1955RVCQ0MJDw9nxowZhd4bHR1Njx49aNeuHbfffjtnz54F4IMPPqB9+/aEh4dzzz33kJKSYj7Xww8/TMeOHXn66acLHCs1NZV7772Xli1bMmjQIFJTU83b/ve//9G5c2ciIyMZOnQoSUlJgKkUyOzZs4mMjCQ0NJQjR44AsGXLFiIiIoiIiKBt27Zcu3aNkydP0qZNGzIyMpg1axbLly8nIiKC5cuXExISwsWLFwHIycmhadOm5uU8SUlJjBkzhtDQUMLCwvj6668LfV4rVqxg9OjRRV5rcHAwV65cMe8bEhLC+fPnuXjxIvfccw/t27enffv27Nixo8ivoa1kaGKuevXqAXDhwgUHRyKqip49exZaN2zYMB555BFSUlK48847C20fPXo0o0ePJj4+3txVkGfz5s0WnffRRx8lLCysUBItzoYNG1i9ejV79uzBw8ODS5cuFdiemZnJ448/zurVq/H392f58uU8++yzfPTRRwwePJgJEyYAMHPmTBYtWsTjjz8OmIZq7ty5E1dX1wLHe/fdd/Hw8ODw4cPs37+fyMhIAOLj43nhhRf48ccf8fT05JVXXmH+/PnMmjULAD8/P3799Vfeeecd5s2bx4cffsi8efNYsGABXbp0ISkpiRo1apjP4+7uzty5c9m7dy///e9/AThy5AhLly5l8uTJ/Pjjj4SHh+PvX7B+1fPPP0+tWrX4/fffAbh8+XKpn2H+a83OzmblypWMGTOGPXv20LBhQwICArjvvvuYMmUKXbt25fTp09x+++0cPnzYoq+RJSSZ5/rpp58Ay75wQlRmPj4+jBo1irfeeouaNWuWuv+PP/7ImDFj8PDwAAr/dfrHH39w4MAB+vTpA0B2djaBgYEAHDhwgJkzZ3LlyhWSkpK4/fbbze8bOnRooUQOsHXrVp544gkAwsLCCAsLA2D37t0cOnSILl26AJCRkUHnzp3N7xs8eDAA7dq1M3eLdunShalTpzJy5EgGDx5MUFBQidc6duxY7rrrLiZPnsxHH33EmDFjivw8li1bZl6uU6dOice8/lqHDx/O3LlzGTNmDMuWLWP48OHm4x46dMj8nsTERJKSkgq0+MtCknmu3377DcDcChCirEpqSXt4eJS43c/Pz+KWeFEmT55MZGRkgWRVrVo1cnJyAFMXQ0ZGhkXH0lrTunVrdu3aVWjb6NGjWbVqFeHh4SxevLhAzJ6enlbFrLWmT58+fPHFF0Vur169OgCurq7mvukZM2bQr18/1q9fT5cuXfj+++8LtM6v16BBAwICAvjpp5/4+eefWbp0qcXx5R82eP1Y8PzX2rlzZ2JjY7l48SKrVq1i5syZgOkz3717d4nxlYX0medKSkrC19fX0WEIYRd169Zl2LBhLFq0yLwuODiY6OhoANasWUNmZiYAffr04eOPPzb3d1/fzdK8eXMuXrxoTuaZmZkcPHgQgGvXrhEYGEhmZqbFibF79+58/vnngKllv3//fgA6derEjh07iI2NBSA5OZmjR4+WeKzjx48TGhrK9OnTad++vbkvPY+3tzfXrl0rsG78+PHcf//9xf7l0KdPHxYsWGBezvtrPSAggMOHD5OTk8PKlSuLjUkpxaBBg5g6dSotW7Y055XbbruNt99+27xfTExMiddmLUnmuZKSkkhISOC5555zdChC2MW0adMKjGqZMGECW7ZsITw8nF27dplbk3379mXgwIFERUURERHBvHnzChzH3d2dFStWMH36dMLDw4mIiGDnzp2AqX+5Y8eOdOnShRYtWlgU16RJk0hKSqJly5bMmjWLdu3aAeDv78/ixYsZMWIEYWFhdO7cuVByvt4bb7xBmzZtCAsLw83NjTvuuKPA9l69enHo0CHzDVCAgQMHmm9yFmXmzJlcvnyZNm3aEB4ezqZNmwB4+eWX6d+/PzfffLO5m6k4w4cP57PPPjN3sQC89dZb7N27l7CwMFq1asV7771X8gdlJaW1tusBSxIVFaUr68MphgwZwtdff03z5s1L/QYSoiiHDx+mZcuWjg5DlGLv3r1MmTKFbdu2OTqUIhX1faSUitZaR5X0Pukzz5XXSpHRLEJUXS+//DLvvvuuVX3lzkK6WXJ98sknPPfcc1y+fNnclyiEqFpmzJjBqVOn6Nq1q6NDsTtJ5vnkjTetTLPnhBDCEtLNkmvs2LHExsbi6+vL1atXS73BIYQQlUmpLXOlVAOl1Cal1CGl1EGl1JO56+sqpX5QSh3L/b/0kfWV2HfffUeLFi2Ij4+3+K68EEJUFpZ0s2QB07TWrYBOwKNKqVbADGCj1joE2Ji77LTsORNLCCEqWqnJXGt9Vmv9a+7ra8Bh4EbgLuCT3N0+Ae4uryDLm9aapKQkXFxcGDJkSIWU/RSiPLi6uhIREUGbNm0YOnSoeSKQpUoqH7tmzRpefvllu8YLpslM9rxPtXfvXnO5gM2bN5vHxFd1Vt0AVUoFA22BPUCA1vps7qZzQEAx75molNqrlNp7fXWyyiItLQ2tNb6+vqxatco8tV8IZ1OzZk1iYmI4cOAA7u7uVk9MKal87MCBA4usqFjZREVF8dZbbwEVn8ydogSuUsoL+BqYrLVOzL9Nm2YeFTn7SGu9UGsdpbWOur46WWWRnp5Oy5YtufHGG/Hz8ytUElMIZ9StWzdiY2O5dOkSd999N2FhYXTq1Mk8fd7a8rGLFy/mscce4+rVqzRs2NBc5yU5OZkGDRqQmZnJ8ePH6du3L+3ataNbt25FTsBLSEjgtttuo3Xr1owfP578Exc/++wzOnToQEREBA899BDZ2dmAqfzss88+S3h4OJ06deL8+fMAfPXVV+aZmt27dwdMCbx///6cPHmS9957j9dff52IiAi2bdtGo0aNzEOPExMTCyznOX/+PIMGDSI8PJzw8HB27txp/lzy5H/oR8+ePZk8eTJRUVG8+OKLZfpsysKiZK6UcsOUyJdqrb/JXX1eKRWYuz0QcNrZNrVr1+bQoUOMGjUKf39/mTgk7KNnT1i82PQ6M9O0/NlnpuWUFNNy7hRzrl41LedWAyQ+3rS8dq1p+dw5q06dlZXFhg0bCA0NZfbs2bRt25b9+/fz73//m1GjRgGYy8fGxMSwbdu2AhUW88rHDh8+nJiYmALT0mvVqkVERARbtmwB4Ntvv+X222/Hzc2NiRMn8vbbbxMdHc28efN45JFHCsX23HPP0bVrVw4ePMigQYM4ffo0YJr5uHz5cnbs2EFMTAyurq7myT3Jycl06tSJffv20b17dz744AMA5s6dy/fff8++fftYs2ZNgfMEBwfz8MMPM2XKFGJiYujWrRs9e/Zk3bp1ACxbtozBgwfj5uZW4H1PPPEEPXr0YN++ffz666+0bt261M87IyODvXv3Mnv27DJ9NmVR6tBEZSoVtgg4rLWen2/TGuBB4OXc/1fbNTIHqVevnrTMhdNKTU0lIiICMLXMx40bR8eOHc0PWLjllltISEggMTHR6vKx+Q0fPpzly5fTq1cvli1bxiOPPEJSUhI7d+5k6NCh5v3S09MLvXfr1q3mErb9+vUzl5jduHEj0dHRtG/f3nwtec8ZcHd3p3///oCpBO4PP/wAmErgjh49mmHDhplL5JZk/PjxvPrqq9x99918/PHH5l8K+f300098+umngOkeRK1atUotjZ3/l11ZPpuysGSceRfgAeB3pVRema9nMCXxL5VS44BTwDC7RlaB9u/fzyOPPMIbb7xBixYt+OuvvxwdkqgK8pewdXMruOzhUXC5Vq2Cy35+BZdvuMGiU+b1mVvC2vKx+Q0cOJBnnnmGS5cuER0dzS233EJycjK1a9e2uRqg1poHH3yQl156qdA2Nzc3cwna/CVw33vvPfbs2cO6deto166duSpkcbp06cLJkyfZvHkz2dnZBbpOSpK/fDCUXAK3PD4bS1gymmW71lpprcO01hG5/9ZrrRO01r211iFa61u11pdKO1ZldeHCBXbs2EFaWhoLFiwo9OeaEM6sW7du5u6KzZs34+fnh4+Pj03lY/N4eXnRvn17nnzySfr374+rqys+Pj40atSIr776CjAl53379hV6b/4SuBs2bDC3env37s2KFSvM3ZyXLl3i1KlTJV7b8ePH6dixI3PnzsXf379QQ6yoaxg1ahT33XdfsVUTe/fuzbvvvguYHsRx9epVAgICuHDhAgkJCaSnp/Ptt98WG1NZPpuykOn8YH7OoLXF9IVwBnPmzCE6OpqwsDBmzJjBJ5+YRhTbUj42v6LKvC5dupRFixYRHh5O69atWb26cO/r7Nmz2bp1K61bt+abb77hpptuAqBVq1a88MIL3HbbbYSFhdGnTx/zs0aL89RTTxEaGkqbNm24+eabCQ8PL7B9wIABrFy50nwDFGDkyJFcvnyZESNGFHnMN998k02bNhEaGkq7du04dOgQbm5uzJo1iw4dOtCnT59SJxba+tmUhZTAxfQh33///Rw9epQTJ04wZ84cvvnmG5nSL6wiJXCdw4oVK1i9ejVLlixxdChFkhK4ZZDXMvfy8iIlJYXdu3dz7tw5SeZCVDGPP/44GzZsKDB+vqqQZI7pga1RUVF4e3ub757LiBYhqp78j22raqTPHBg2bBi//PILXl5e+Pn5AZLMhW0qsttSVD1l+f6RZH4db29v4J+uFyEsVaNGDRISEiShC5torUlISLB4eOj1pJsF093+bdu2sXHjRry9venQoQN169Z1dFjCyQQFBREXFyd/1Qmb1ahRw6rJW/lJMgdOnDjBiRMnAFPLfM+ePQ6OSDgjNzc3GjVq5OgwhEFJNwumug9Sy1wI4cwkmVP4wRTdu3dn7ty5DoxICCGsI8mcwsn8zz//5OTJk44LSAghrCR95kBkZCS1atUyL3t6epKcnOzAiIQQwjqSzCk8kUCSuRDC2Ug3SxEkmQshnI0kc6BZs2YFaih36dKFyMhIB0YkhBDWMXw3S05ODrGxsQWKzRdVHF8IISozw7fMU1NT0VrLOHMhhFMzfDLPX/42z+zZsy1+nJQQQlQGhk/meTc68z9lKCUlhePHjzsqJCGEsJrhk7m7uztDhw6lSZMm5nWenp6kpaWRnZ3twMiEEMJyhr8BGhQUxJdffllgXV6XS0pKirkkrhBCVGaGb5kXJa/LRcaaCyGcheGT+erVq6lduzYHDx40r2vevDnDhw/H1dXVgZEJIYTlDN/NkpiYyNWrVws83eOWW27hlltucWBUQghhHcO3zPOGJuYfzSKEEM5GknkR48z37NmDj48PP/30k6PCEkIIqxg+maekpABQs2ZN8zo3NzeuXbvGtWvXHBWWEEJYxfDJPCIignHjxhW42SmjWYQQzsbwN0Dvuusu7rrrrgLrJJkLIZyN4VvmWutC6ySZCyGcjeGT+aRJk6hfv36BdV5eXowdO5ZWrVo5KCohhLCO4btZ0tPTqVat4Mfg5ubGokWLHBSREEJYz/At84yMDNzd3Qut11pLoS0hhNMwfDJPT0+nevXqhdYHBwczceJEB0QkhBDWM3wyL65lXr16dbkBKoRwGobvMx84cCCJiYmF1nt6ekoyF0I4DcMn8/Hjxxe5XpK5EMKZGL6bJTk5mfT09ELrJZkLIZyJ4VvmPXv2pF69eqxbt67A+mHDhkltFiGE0yg1mSulPgL6Axe01m1y180BJgAXc3d7Rmu9vryCLE/p6elF3gAdN26cA6IRQgjbWNLNshjoW8T617XWEbn/nDKRg2k0S1FDEzMyMrh8+bIDIhJCCOuVmsy11luBSxUQi0MU1zJ/5plnCAoKckBEQghhvbLcAH1MKbVfKfWRUqpOcTsppSYqpfYqpfZevHixuN0cpriWuaenJykpKeTk5DggKiGEsI6tyfxdoAkQAZwFXituR631Qq11lNY6yt/f38bTlZ8pU6YwYMCAQuvzKiempqZWdEhCCGE1m0azaK3P571WSn0AfGu3iCrYv/71ryLX5y+DK88HFUJUdja1zJVSgfkWBwEH7BNOxTtz5oz5OaD5SU1zIYQzKTWZK6W+AHYBzZVScUqpccCrSqnflVL7gV7AlHKOs1xorQkKCuI///lPoW3t2rXj+eefx8fHxwGRCSGEdUrtZtFajyhidZUo9p2VlQVQ5GiW0NBQQkNDKzokIYSwiaGn8+dN4y9unPnp06dJSUmp6LCEEMJqhk7mGRkZQNEt8/3799OwYUM2btxY0WEJIYTVDJ3M81rmRSVzuQEqhHAmhk7mXl5evPbaa9x8882FtkkyF0I4E0NXTfT29mbq1KlFbstL5kUNWxRCiMrG0C3ztLQ0/vjjjyJb39IyF0I4E0Mn84MHD9KiRYsib3JWr16d+fPn06dPHwdEJoQQ1jF0N0veaJaihiYqpZgyxSnnQgkhDMjQLfOShiYC/Pnnnxw/frwiQxJCCJsYumVe0tBEMD06zs/Pjw0bNlRkWEIIYTVpmVN0NwtAnTp15GlDQginYOhkHhoayvvvv09wcHCR2yWZCyGchaG7WRo2bMjEiROL3S7JXAjhLAzdMr9w4QLR0dHm7pbr5SVzrXUFRyaEENYxdDJftWoVUVFRxMfHF7l96NChfPLJJ5LMhRCVnqG7WUobzRIZGUlkZGRFhiSEEDYxdMu8tNEsV65cYcuWLVy9erUiwxJCCKsZOpmX1jKPjo6mZ8+e7Nu3ryLDEkIIqxk6mZc2A7ROnToAMqJFCFHpGbrP/J577qFZs2YopYrcLslcCOEsDJ3MS3tosyRzIYSzMHQ3y5EjR9ixY0ex2318fFBKSTIXQlR6hm6Zz58/n7Vr13L27Nkit7u4uLB69WqaN29ewZEJIYR1DJ3MMzIyih2WmGfAgAEVFI0QQtjO0N0s6enpxY5kybNr1y62bNlSQREJIYRtDN8yLy2Zz5o1i6SkJHbt2lVBUQkhhPUM3zIvrZtFKicKIZyBoVvmc+bMITU1tcR9JJkLIZyBoZN5VFRUqfvUqVOHK1euoLUudnKREEI4mqG7WTZv3syePXtK3KdOnTpkZGSU2oIXQghHMnTLfNq0aQQGBvLtt98Wu899991Hjx49Sr1RKoQQjmToZG7JDdAGDRrQoEGDCopICCFsY+huFkuGJl68eJGPP/6YuLi4CopKCCGsZ+hkbknL/PTp04wdO5bo6OgKikoIIaxn6GRuSctcKicKIZyBofvMV61aZU7WxZFkLoRwBoZO5h07dix1n1q1akkZXCFEpWfobpYlS5aU+nxPFxcXatWqJclcCFGpGbZlrrVm1KhRzJ49m/Dw8BL33bFjB/7+/hUUmRBCWK/UlrlS6iOl1AWl1IF86+oqpX5QSh3L/b/kjudKqLSHOefXqlUrSeZCiErNkm6WxUDf69bNADZqrUOAjbnLTiUvmZc2NBFg69atzJ8/v7xDEkIIm5WazLXWW4FL162+C/gk9/UnwN12jqvcpaenA5a1zL///nuefvppMjMzyzssIYSwia03QAO01nkPzjwHBBS3o1JqolJqr1Jq78WLF208nf1Z0zJv2rQp2dnZnDp1qrzDEkIIm5R5NIvWWgO6hO0LtdZRWuuoytTv7OfnR0xMDIMHDy5135CQEABiY2PLOywhhLCJraNZziulArXWZ5VSgcAFewZVEdzd3UsdxZKnadOmgCRzIUTlZWvLfA3wYO7rB4HV9gmn4sTHx/POO+/w559/lrpvQEAAnp6eFu0rhBCOUGrLXCn1BdAT8FNKxQGzgZeBL5VS44BTwLDyDLI8nDx5kkcffZS1a9fSqFGjEvdVSnHy5El8fX0rKDohhLBOqclcaz2imE297RxLhbJmnDmY+tiFEKKyMux0/ryhiZaMZgHYtGkTY8eOJTs7uzzDEkIImxg2mVvbMj9x4gQff/wxf/31V3mGJYQQNjFsMrdm0hD8M6Ll2LFj5RaTEELYyrDJ/NZbbyU2NpbWrVtbtL8MTxRCVGaGrZro4eFBkyZNLN4/MDCQmjVrSjIXQlRKhm2Zx8TE8Morr5CYmGjR/i4uLrRq1YqUlJRyjkwIIaxn2Jb57t27mTFjBqNGjcLHx8ei9/zyyy8opco5MiGEsJ5hW+bWFNrKI4lcCFFZGT6ZWzqaBWDz5s3ceuutnDt3rrzCEkIImxg2mVs7aQggLS2NjRs3yk1QIUSlY9hkntcyr1bN8tsGjRs3BkwTiIQQojIxbDKfPn06586ds6ofvGHDhiilJJkLISodw45m8fDwwMPDw6r3VK9enaCgIEnmQohKx7At81WrVvHyyy9b/b5u3bpRt27dcohICCFsp0xPfasYUVFReu/evRV2vpJMmDCB9evXc+bMGUeHIoQQJVJKRWuto0rax7At84yMDKuGJQohRGVm2GSenp5u1bDEPJs2baJ58+YcPny4HKISQgjbGDaZp6Sk4OnpafX7atasydGjRzl+/Hg5RCWEELYxdDK3djQLyFhzIUTlZNihid9//7154pA1/P398fT0lGQuhKhUDNsyd3V1pWbNmla/TylF48aNJZkLISoVw7bMZ82aRfPmzRk5cqTV7x04cKBUUBRCVCqGHWdev359+vXrxwcffODoUIQQokQyzrwEtt4AzaO1piJ/EQohREkMncxtGZoIsG7dOry8vDhw4ICdoxJCCNsYMplnZmaSmZlpc8vcx8eHlJQUzp49a+fIhBDCNoZM5mlpadSoUcPmZF6/fn0ASeZCiErDkKNZvL29SU1NtbnPOzAwEJBkLoSoPAzZMs9j6/BCDw8PfHx8+Pvvv+0ckRBC2MaQyfzUqVOMGjWKX3/91eZjTJo0iU6dOtkxKiGEsJ0hu1nOnTvHkiVLGDFihM3HsOXBFkIIUV4M2TJPSUkBKPM488TERHuFJIQQZSLJ3EbTp08nICBAJg4JISoFQybz5ORkoGzJPCAggLS0NK5evWqvsIQQwmaGTOYAvr6+eHl52fx+GZ4ohKhMDJnMhw0bRnx8PA0bNrT5GHkTh2R4ohCiMjBkMrcHaZkLISoTQybzpUuXMnjw4DLdvAwKCmLmzJm0bt3ajpEJIYRtDDnOfP/+/axfv75MD5jw9PTk+eeft2NUQghhuzIlc6XUSeAakA1klVY8vbIoS/nb/C5dukRycjINGjSwQ1RCCGE7e7TMe2mt4+1wnApT1gdT5Ln77rtxcXFh8+bNZQ9KCCHKwJB95lYn8/T0f15/9x1MnQoZGQQGBnJeRrMIISqBsiZzDfxPKRWtlJpY1A5KqYlKqb1Kqb0XL14s4+nso27dujRp0sSynd95B/z8IG9y0L59sHgxuLkRGBjImJMnoWNHyMwsr3CFEKJUZU3mXbXWkcAdwKNKqe7X76C1Xqi1jtJaR/n7+5fxdPaxYMEC1q9fX/wOW7bAmTOm1z16wIQJkJZmWp4+HS5dAqUIDAzkQGYmGe3agZubafvu3ZCVVb4XIIQQ1ylTMtdan8n9/wKwEuhgj6Ac6uxZuP12eO0103Lr1jB/PgQEFNq1fv36LAFOTZliWnHmDPTsaUr4QghRgWy+AaqU8gRctNbXcl/fBsy1W2TlaOzYsTRs2JDZs2f/szIhAXx9ITAQNmwAC2qVd+7cmffeew9fX1/Tivr14fPPoUsX03J2Nri6lsMVCCFEQWUZzRIArMwdq10N+Fxr/Z1doipn27dvJy2v2wRg2za4805Yu9bUsu7Vy6LjNG3alKZNm/6zQikYPNj0OicHhgyBFi3gpZfsF7wQQhTB5mSutT4BhNsxlgpTaDRLRATcey+0amX1sQ4dOkROTg5t2rQpuCEnx9RSz532L4QQ5cmQM0CTk5PxqFkTvvjC1Hr29oYPPrDpWMOHD6dx48asXr264IZq1WDBAsgrGfDHH3DjjVCGSo1CCFEcw44zb335Mtx3H3z6aZmO1bRpU44dO1b8DkpBcrKp+2bChDKdSwghimO4lnlOTg5hYWHorl3hwQehd+8yHS8kJIT169eTnZ2Na3E3Oz094b//hbCwMp1LCCGKY7hk7rJkCb8sWWK6MWkHTZs2JSMjg7i4uJLro99zzz+vV66E/v3/GZsuhBBlZKxulitXTGPAX3nFbocMCQkBIDY21rI3/PyzacTLwoV2i0EIIYyVzGvX5u9Vq+j8229s2LDBLoeMjIxk3bp1tG3b1rI3dOgA334LDz9sl/MLIQQYJZmfPQsffQTAJS8vdu/bR1JSkl0OXatWLe68807q1q1r+Zv69TNNJrpyBUoqKyCEEBYyRjJ/80144gmIiyMlJQXALvXM82zfvp21a9da/8ZnnoGhQ6GSFCATQjgvYyTzF5+Fw4sAABIISURBVF+EnTshKMiczO1RzzzPa6+9xnRb6rG89BL88ANUkgJkQgjnVbWT+eefm7oyXF3NwwLLI5mHhIRw/PhxsrOzrXtjrVpw882m1zt3QmKi3WISQhhL1U3mp07BmDHw6qsFVvv4+NCjR49/imPZQUhIiHl4ok3OnTONd585024xCSGMpeqOM2/YELZvLzRRp2vXrnZ/zFtesa3Y2NiSx5oX54Yb4MsvoXuhcvBCCGGRqtcyP3PG9HAJgPbtoXr1cj9l69atqVatGr/88ovtBxkwwNTtkp0Ne/faLzghhCFUvWT+9NMwaFCx/c8LFy6kefPmJCcn2+2U9erVY8+ePbbdBL3ec89B165w8mTZjyWEMIyq183yzjtw4AD4+BS5+dy5cxw9epTqdm6xR0ZGAnD+/Hn8/PyKr9NSmiefhJAQCA62X3BCiCqv6rTMv//e1EVRq9Y/T/opQkpKCu7u7lSrZv/fY0ePHqVZs2a89dZbth/E1xceeMD0+uRJsNPkJiFE1VY1kvlvv0Hfvqb64aVISUmx64Sh/EJCQujZsyfTpk1jxYoVZTvYtWumR9c99ph9ghNCVGlVo5ulbVv4+mvTNPlSFHrKkB0ppfjiiy/o06cPI0eOxNfXl14WPoKuEG9v07DKzp3tG6QQokpy7mQeG2t6+EOTJv88e7MUoaGh5D63tFx4eHiwdu1aOnbsyFNPPcXesoxMGTXqn9d//gmNGpU9QCFEleS8yVxruP9+U3fE/v2mWZ4WePLJJ8s5MKhbty7Tp0/n4MGD5OTk4OJSxt6sDz80dbfs3m16XqkQQlzHeZO5UqZHvl2+bHEir0jjx4+338EGDYK4OLj+odFCCJHL+W6A5uSYilMBNGsGHTta9fY77riDe++9txwCKywrK4sjR46U/UC+vjBnjukh0YmJppK+QgiRj/Ml88WL4bbbTFP1bXD27Flzsa3yNmXKFDp06EBmZqZ9Dqi16XFzAweafqkJIUQu50vmDzwAn31W4ljykpw5c4b69evbOaii9e7dm2vXrrHdxl88hSgFs2aZZomWtR9eCFGlOE9G+OwzUxeDmxuMHGlKbFZKTEwkPj6exo0bl0OAhd166624u7uzbt06ex4U7rzT9HrXLrBjWQIhhPNyjmR+7JipnO0bb5TpMH/++SdAhSVzLy8vevTowbfffovW2r4Hv3DBlNifftq+xxVCOCXnSOYhIbBtm+kxa2VQo0YNxo4dS2hoqJ0CK92gQYP4448/OHDggH0PXK8eLFsGzz9v3+MKIZySsnuLsQRRUVG6TJNonFBCQgIxMTH06NGjXOrBAKaboc8/D5MmmZK8EKJKUUpFa62jStrHOVrmdnLt2jX7d3eUwtfXl969e5dfIgc4dAheecVU0kAIYUiGSuZDhgyha9euFX7ec+fO8dRTT9m/qyVPmzZw8CA8/LBpOTW1fM4jhKi0DJXMT5w4QVBQUIWft1q1arz++ussXbq0/E7SqJFphM/ff0Pz5rBkSfmdSwhR6TjvdH4rZWdnc/LkSYYMGVJg/fuLFnPh0tVyP39wk6a8v/ADqtfyt/3BFRZwT0ujn68/22MOcvHcm+V2HiGqqnp1a/HQuNGODsNqhknmcXFxZGVlFRqWeOHSVdr1HlTu50+jOv9+ehLbt+9k8px5ZS++dZ3s7GzzL4nT/UZwE3AT0Hjxu1xtGUpCx4rvXhLCGUVvXOnoEGximG6WEydOABU3xvx6XXrfyciHprJx3des/nyRXY6ZeOUyn703nyfv78+EQT0KbXdJS+PGdV8TuHG9Xc4nhKi8DNMyDwoKYvbs2bRu3dphMYyY8AS16/rSs+9dAKSlplKjZk2rj3PlUjzffPYB6778lPS0VNpEdiSyU3eys7LI0Tm889JMHnzsaWrX9WPHp2vN7/M6cRS/3ds5NfQBtJub3a5LCOF4hknmISEhzJkzx6ExKKW4c8j9AGRmZjD5gf60CIvkwUefpo6vf4nvPf/3X7i4uOJ/Q33+OBDDyiUL6X7bAIaPe4ybGjcz73fyyAG2fLeaA7/u4fkFS7jhxpvM2+p/t5qGyz8lbsAQsuyYzLXWZGVl4ubmbrdjCiGsY5huluPHj5OQkODoMMyys7Jp3/UWNq1byYRBPflq8bukp6UV2Cfh4nmWffgWjw6/nbEDurJm2ccAdOjWm4XfbOapF98qkMgBmrRowwvvLiXx6mX+NWYwmzesIjs7G4Cjk/7Fjs++JcvbB4CIZx4naNWyMl/LmVMnGNajDU+PG8KhfcaaFCZEZWGYZD5ixAhGjBjh6DDMatSsybjJz/LOl/+jTdsOLH77ZR4a3Muc0D9689+MG9iNz96bj6eXNxOmzaLPXcMAUws/sEHDYo/dKjyK/yxaQa06vvxn5pMsnDeH3Ddy2T+AP48dJmbjBpKOHeHc/l9NE6myswn831qqXbNsZM/Fc3+z8rMPAHBzd6ffkAe4cO5v5jw5htMnjtr+wQghbGKIbpbMzExiY2MZNmyYo0Mp5MaGjZnz5sfs+2Unyz/6L+7VqwOmB1v0uuNuho15tMTEXZybGjfj7S82sGPjBhoENwHg2OHfmXx/f/M+zwKcOMqrA4bSIy2Vtv/vMaLnLeR8r9txT7iI518nudI6HJ2v+0RrzaYNK3nvldlkZ2fRpfcdBNRvwPip/8eAe0czbfQgZj0+mtcWr8TXP6BMn40QwnJlSuZKqb7Am4Ar8KHW+mW7RGVHWmseeeQRLl++TN++fR0dTrHC299MePubzcsTpv5fmR887eLiQrc+/czLKUnXGPnQFIKCm1Av8EZ8atflzKkTtG7bnvicHF59YCKp6enEf/M5zTas4vFf9/D/Rj1Etyefoebe3WyYMo4PcrK5kpZKq7B2TJ37OvUC/5mEFVC/AXPe/JgZE4dz7NB+fHv0KVP8WmuuXk4g8eplbmzQCNfyLIkgKrXka4kkxJ8nMKhhud2b0VqX68Pey5vNPx1KKVdgAdAHiAN+UUqt0Vofsldw9vDqq6/y4Ycf8swzz3D33Xc7OhyLlcc31fW/MADqNwgGICs7m3n/+5aLSxYCEOTpzbEmzXBpaBrKGbB9Iy+kpfDn8NEENm7GY+fO0ODhEWxaux1cXLjhx3V4HzsCk6bx/jebaHzxPG47t7C5Rg08vLxp7ulFNSA194ZstcSrqJwcMmvXMXXzpKZw+VICZy+ep/5NwdTx9WfP1h95fqrpWao1Pb1oHRFFm8hO9O5/D3X9pKBYeSqphpFSCq11kfsopczbc4p4GpZSChcXF7TWZGdlFdru4uqKi4sLVy7Fs/3H9fy2exuHYn4h8eplAF5euJzQdp04c+oE8RfOcVPjELx8almd4DMzM0i4cI7Tx49x9NA+Yn7eTnZWFq9/ugaA4cOHk56eTteuXenatSvNmjXD29sbt0o8CqwsTZ0OQKzW+gSAUmoZcBdQqZJ5SkoK9957L89LqdgSVXNz44NVmzny+2/UC7yReoFBBX6hnHzyGf4e9RBj6voBcO2n7ziblmp+4lGdfdHU2/IDxyZNw9c/gIb/fRWf3VvpcuUyWVmZLAWigFau1bjvocm8euQA1Y8fxS/uFDnZ2awBWgIPAOOnzOTfu7cRdfUyF/41Bw8vb0Yt+A/J0XvoumMznXvexp0zHuXM5UtEnjkNwPasTM6iGObmxifrd3HnY6M4lJlJr7hTAPySkU6McmFi9ep8sfE3eo/sx65q1Rj4l2n7wfQ0vnNx4akaNVmx9SC9+t3Mem8fRp421cD/Mz2NJa6uvFLHlyXf/8KtvcL43P8GHoo7CUB8ejqvurryUf0GLPryf9zauy0LAgL5V9xpvLTmdFYms1xcWd20Oe+/+zm9BnThJb96zP07jhu05lB2FlNcXNkV2pa3X3yT7sNuY3qtOrx54RwhWvNzTjbjXVyI7dCV16f+HzePvpsJHp58eimeCK3ZrDUjlOJyr9v5z6iH6PDI/dzj6srapGt01Zp1QD/Avd89vNhvMO3+NZGemRnsyMzkDmA50B24cegDzOnUnYj/m0x4SjKHgKHAIiASiHhwEs82bUbrfz9L09QUTgNjMP15HgLcNmkaT9euS/M3XuSG1BQuAY8DLwL1gfumzWZyViZN3n8dr7RUMoH/l/vPB3hy1qs8evYMQZ8vYmRyEgH1g3g/4Ab6ZmYw9+m5NG8TQYvXX6D9yi+ol5wEwFtAX6CVmztfbfmdyNfmwo/rCEkz1ShamJlJmNZ08fBkxdaDhD/7BIm7t9LqiukXxJfAVA9P/jv2MQCGLf6QYYlXGFCzJqtXr2Y9sAf4eOhQvvzyS+jdG7y9YdUq0w9Ht24QGAhffmla7tgRmjaFvPIdbdvCgAEwd651P5RWsrkErlJqCNBXaz0+d/kBoKPW+rHr9psITMxdbA78YXu45cIPiHd0EBXAKNcJxrlWo1wnyLU21FqXOH653DshtdYLgYXlfR5bKaX2llYnuCowynWCca7VKNcJcq2WKMvQxDNAg3zLQbnrhBBCVLCyJPNfgBClVCOllDtwL7DGPmEJIYSwhs3dLFrrLKXUY8D3mIYmfqS1Pmi3yCpOpe0CsjOjXCcY51qNcp0g11qqCn0GqBBCiPJhmOn8QghRlUkyF0KIKsAQyVwp1Vcp9YdSKlYpNaOI7dWVUstzt+9RSgVXfJT2YcG1TlVKHVJK7VdKbVRKWV/4pZIo7Vrz7XePUkorpZxyaJsl16mUGpb7dT2olPq8omO0Fwu+f29SSm1SSv2W+z18pyPiLCul1EdKqQtKqSKf8q5M3sr9HPYrpSJLPWjetNyq+g/TzdnjQGPAHdgHtLpun0eA93Jf3wssd3Tc5XitvQCP3NeTqvK15u7nDWwFdgNRjo67nL6mIcBvQJ3c5XqOjrscr3UhMCn3dSvgpKPjtvFau2OaVHugmO13AhsABXQC9pR2TCO0zM1lB7TWGUBe2YH87gI+yX29AuitnLPiTqnXqrXepLVOyV3cjWl+gDOy5OsK8DzwCpBWxDZnYMl1TgAWaK0vA2itL1RwjPZiybVqTDP/AWoBf1dgfHajtd4KXCphl7uAT7XJbqC2UiqwpGMaIZnfCPyVbzkud12R+2its4CrgG+FRGdfllxrfuMw/fZ3RqVea+6fpg201usqMjA7s+Rr2gxoppTaoZTanVvN1BlZcq1zgPuVUnHAekylX6oia3+WjVHPXBSmlLofU+2rwk+CrgKUUi7AfGC0g0OpCNUwdbX0xPSX1lalVKjW+opDoyofI4DFWuvXlFKdgSVKqTZa68IlGg3GCC1zS8oOmPdRSlXD9Odb5XnGnOUsKrGglLoV07MpBmqt0ysoNnsr7Vq9gTbAZqXUSUz9jmuc8CaoJV/TOGCN1jpTa/0ncBRTcnc2llzrOEyFDtFa7wJqYCpMVdVYXS7FCMnckrIDa4AHc18PAX7SuXchnEyp16qUagu8jymRO2vfKpRyrVrrq1prP611sNY6GNP9gYFaa2d7SKkl37+rMLXKUUr5Yep2OVGRQdqJJdd6GugNoJRqiSmZX6zQKCvGGmBU7qiWTsBVrfXZEt/h6Lu6FXTn+E5MrZXjwLO56+Zi+uEG0zfEV0As8DPQ2NExl+O1/gicB2Jy/61xdMzlda3X7bsZJxzNYuHXVGHqUjoE/A7c6+iYy/FaWwE7MI10iQFuc3TMNl7nF8BZIBPTX1bjgIeBh/N9TRfkfg6/W/K9K9P5hRCiCjBCN4sQQlR5ksyFEKIKkGQuhBBVgCRzIYSoAiSZCyFEFSDJXBiKUipbKRWjlDqglFqrlKqduz5YKZWaW43vsFLqZ6XUaAeHK4TFJJkLo0nVWkdordtgKnT0aL5tx7XWbbXWLTFNWJmslBrjkCiFsJIkc2FkuyimeJHW+gQwFXiiQiMSwkaSzIUhKaVcMU0Lv366eH6/Ai0qJiIhykaSuTCamkqpGOAcEAD8UMK+zljTXhiUJHNhNKla6wigIaZk/WgJ+7YFDldIVEKUkSRzYUja9LSlJ4BpuWWPC8h9Duw84O2KjUwI20ihLWEoSqkkrbVXvuW1mOpjb8PUCj+CqYrmNeAdrfViR8QphLUkmQshRBUg3SxCCFEFSDIXQogqQJK5EEJUAZLMhRCiCpBkLoQQVYAkcyGEqAIkmQshRBXw/wEset8M5yfkwQAAAABJRU5ErkJggg==\n",
            "text/plain": [
              "<Figure size 432x288 with 1 Axes>"
            ]
          },
          "metadata": {
            "tags": [],
            "needs_background": "light"
          }
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "24Ljv5_jW_6A",
        "colab_type": "text"
      },
      "source": [
        "##多元线性回归"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "fDAGpG75MfzJ",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "import sys\n",
        "sys.path.append('/content/drive/My Drive/Colab Notebooks/Machine-Learning-From-Scratch-master')"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "v3zkfwukLLbF",
        "colab_type": "code",
        "colab": {}
      },
      "source": [
        "X = all_data.drop(['RD','Core Lithology'],axis=1)\n",
        "y = all_data['RD'].values"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "AL1CozV4ILLL",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 593
        },
        "outputId": "c33740a4-8991-42e7-83b7-0c82659c5ea4"
      },
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "from sklearn.datasets import make_regression\n",
        "\n",
        "from utils import train_test_split\n",
        "from utils import mean_squared_error, Plot\n",
        "\n",
        "# L1正则化\n",
        "class l1_regularization():\n",
        "    def __init__(self, alpha):\n",
        "        self.alpha = alpha\n",
        "\n",
        "    # L1正则化的方差\n",
        "    def __call__(self, w):\n",
        "        loss = np.sum(np.fabs(w))\n",
        "        return self.alpha * loss\n",
        "\n",
        "    # L1正则化的梯度\n",
        "    def grad(self, w):\n",
        "        return self.alpha * np.sign(w)\n",
        "\n",
        "\n",
        "# L2正则化\n",
        "class l2_regularization():\n",
        "    def __init__(self, alpha):\n",
        "        self.alpha = alpha\n",
        "\n",
        "    # L2正则化的方差\n",
        "    def __call__(self, w):\n",
        "        loss = w.T.dot(w)\n",
        "        return self.alpha * 0.5 * float(loss)\n",
        "\n",
        "    # L2正则化的梯度\n",
        "    def grad(self, w):\n",
        "        return self.alpha * w\n",
        "\n",
        "\n",
        "class LinearRegression():\n",
        "    \"\"\"\n",
        "    Parameters:\n",
        "    -----------\n",
        "    n_iterations: int\n",
        "        梯度下降的轮数\n",
        "    learning_rate: float\n",
        "        梯度下降学习率\n",
        "    regularization: l1_regularization or l2_regularization or None\n",
        "        正则化\n",
        "    gradient: Bool\n",
        "        是否采用梯度下降法或正规方程法。\n",
        "        若使用了正则化，暂只支持梯度下降\n",
        "    \"\"\"\n",
        "\n",
        "    def __init__(self, n_iterations=3000, learning_rate=0.00005, regularization=None, gradient=True):\n",
        "        self.n_iterations = n_iterations\n",
        "        self.learning_rate = learning_rate\n",
        "        self.gradient = gradient\n",
        "        if regularization == None:\n",
        "            self.regularization = lambda x: 0\n",
        "            self.regularization.grad = lambda x: 0\n",
        "        else:\n",
        "            self.regularization = regularization\n",
        "\n",
        "    def initialize_weights(self, n_features):\n",
        "        # 初始化参数\n",
        "        limit = np.sqrt(1 / n_features)\n",
        "        w = np.random.uniform(-limit, limit, (n_features, 1))\n",
        "        b = 0\n",
        "        self.w = np.insert(w, 0, b, axis=0)\n",
        "\n",
        "    def fit(self, X, y):\n",
        "        m_samples, n_features = X.shape\n",
        "        self.initialize_weights(n_features)\n",
        "        X = np.insert(X, 0, 1, axis=1)\n",
        "        y = np.reshape(y, (m_samples, 1))\n",
        "        self.training_errors = []\n",
        "        if self.gradient == True:\n",
        "            # 梯度下降\n",
        "            for i in range(self.n_iterations):\n",
        "                y_pred = X.dot(self.w)\n",
        "                loss = np.mean(0.5 * (y_pred - y) ** 2) + self.regularization(self.w) #计算loss\n",
        "                # print(loss)\n",
        "                self.training_errors.append(loss)\n",
        "                w_grad = X.T.dot(y_pred - y) + self.regularization.grad(self.w)  # (y_pred - y).T.dot(X)，计算梯度\n",
        "                self.w = self.w - self.learning_rate * w_grad #更新权值w\n",
        "        else:\n",
        "            # 正规方程\n",
        "            X = np.matrix(X)\n",
        "            y = np.matrix(y)\n",
        "            X_T_X = X.T.dot(X)\n",
        "            X_T_X_I_X_T = X_T_X.I.dot(X.T)\n",
        "            X_T_X_I_X_T_X_T_y = X_T_X_I_X_T.dot(y)\n",
        "            self.w = X_T_X_I_X_T_X_T_y\n",
        "\n",
        "    def predict(self, X):\n",
        "        X = np.insert(X, 0, 1, axis=1)\n",
        "        y_pred = X.dot(self.w)\n",
        "        return y_pred\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "\n",
        "def main():\n",
        "\t\n",
        "    X_train=training_data.drop(['RD','Core Lithology'],axis=1)\n",
        "    y_train = training_data['RD'].values\n",
        "    X_test = testing_data.drop(['RD','Core Lithology'],axis=1)\n",
        "    y_test = testing_data['RD'].values\n",
        "    n_samples, n_features = np.shape(X)\n",
        "\n",
        "    # 可自行设置模型参数，如正则化，梯度下降轮数学习率等\n",
        "    model = LinearRegression(n_iterations=3000, regularization=l2_regularization(alpha=0.5))\n",
        "\n",
        "    model.fit(X_train, y_train)\n",
        "\n",
        "    # Training error plot 画loss的图\n",
        "    n = len(model.training_errors)\n",
        "    training, = plt.plot(range(n), model.training_errors, label=\"Training Error\")\n",
        "    plt.legend(handles=[training])\n",
        "    plt.title(\"Error Plot\")\n",
        "    plt.ylabel('Mean Squared Error')\n",
        "    plt.xlabel('Iterations')\n",
        "    plt.show()\n",
        "\n",
        "    y_pred = model.predict(X_test)\n",
        "    y_pred = np.reshape(y_pred, y_test.shape)\n",
        "\n",
        "    mse = mean_squared_error(y_test, y_pred)\n",
        "    print(\"Mean squared error: %s\" % (mse))\n",
        "\n",
        "    y_pred_line = model.predict(X)\n",
        "\n",
        "    # Color map\n",
        "    cmap = plt.get_cmap('viridis')\n",
        "\n",
        "    # Plot the results，画拟合情况的图\n",
        "    m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10)\n",
        "    m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10)\n",
        "    plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label=\"Prediction\")\n",
        "    plt.suptitle(\"Linear Regression\")\n",
        "    plt.title(\"MSE: %.2f\" % mse, fontsize=10)\n",
        "    plt.xlabel('Day')\n",
        "    plt.ylabel('Temperature in Celcius')\n",
        "    plt.legend((m1, m2), (\"Training data\", \"Test data\"), loc='lower right')\n",
        "    plt.show()\n",
        "\n",
        "\n",
        "if __name__ == \"__main__\":\n",
        "    main()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "ValueError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-71-279586b4fbb9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m    148\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    149\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__name__\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"__main__\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 150\u001b[0;31m     \u001b[0mmain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
            "\u001b[0;32m<ipython-input-71-279586b4fbb9>\u001b[0m in \u001b[0;36mmain\u001b[0;34m()\u001b[0m\n\u001b[1;32m    113\u001b[0m     \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mLinearRegression\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_iterations\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m3000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mregularization\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0ml2_regularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0malpha\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m     \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    116\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    117\u001b[0m     \u001b[0;31m# Training error plot 画loss的图\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-71-279586b4fbb9>\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y)\u001b[0m\n\u001b[1;32m     78\u001b[0m             \u001b[0;31m# 梯度下降\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     79\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_iterations\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 80\u001b[0;31m                 \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     81\u001b[0m                 \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m**\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mregularization\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m#计算loss\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     82\u001b[0m                 \u001b[0;31m# print(loss)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdot\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m   1133\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlvals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mrvals\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1134\u001b[0m                 raise ValueError(\n\u001b[0;32m-> 1135\u001b[0;31m                     \u001b[0;34mf\"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1136\u001b[0m                 )\n\u001b[1;32m   1137\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mValueError\u001b[0m: Dot product shape mismatch, (3345, 5) vs (6, 1)"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5q_C-T-Wm1yk",
        "colab_type": "text"
      },
      "source": [
        "##岭回归"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "LS8HkxxUgwMq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 241
        },
        "outputId": "01508130-98f9-4db7-8d2b-48d76f311d17"
      },
      "source": [
        "from numpy import genfromtxt\n",
        "import numpy as np\n",
        "from sklearn import linear_model\n",
        "import matplotlib.pyplot as plt\n",
        "from mpl_toolkits.mplot3d import axes3d\n",
        "\n",
        "# X为特征，Y为结果\n",
        "X = X_train\n",
        "Y = y_train\n",
        "\n",
        "\n",
        "# 预测模型\n",
        "regression_equation = linear_model.LinearRegression()\n",
        "regression_equation.fit(X,Y)\n",
        "y_pred = model.predict(X_test)\n",
        "y_pred = np.reshape(y_pred, y_test.shape)\n",
        "\n",
        "mse = mean_squared_error(y_test, y_pred)\n",
        "print(\"Mean squared error: %s\" % (mse))\n",
        "\n",
        "# print('系数为')\n",
        "# print(regression_equation.coef_)\n",
        "# print('截面或截距为')\n",
        "# print(regression_equation.intercept_)\n",
        "\n",
        "# 为三维图准备数据，x,y,z为三维坐标值\n",
        "xs = []\n",
        "ys = []\n",
        "zs = []\n",
        "zs_predict = []\n",
        "\n",
        "for x in X_test:\n",
        "    xs.append(x[0])\n",
        "    ys.append(x[1])\n",
        "    x = np.array(x).reshape(1, -1)\n",
        "    predict = regression_equation.predict(x)\n",
        "    zs_predict.append(predict[0])\n",
        "\n",
        "for z in y_test:\n",
        "    zs.append(z)\n",
        "\n",
        "\n",
        "# 训练集坐标数据\n",
        "xs_train = []\n",
        "ys_train = []\n",
        "zs_train = Y\n",
        "for x in X:\n",
        "    xs_train.append(x[0])\n",
        "    ys_train.append(x[1])\n",
        "\n",
        "# 绘制3D散点图\n",
        "fig = plt.figure()\n",
        "ax = fig.add_subplot(111, projection='3d')\n",
        "ax.scatter(xs, ys, zs, s=20, c='blue', depthshade=True)\n",
        "ax.scatter(xs, ys, zs_predict, s=20, c='red', depthshade=True)\n",
        "ax.scatter(xs_train, ys_train, zs_train, s=35, c='black', depthshade=True, marker='+')\n",
        "\n",
        "ax.set_xlabel('Traffic load')\n",
        "ax.set_ylabel('Forwarding load threshold')\n",
        "ax.set_zlabel('Hot spot proportional threshold')\n",
        "\n",
        "plt.show()\n"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "NameError",
          "evalue": "ignored",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-13-9da387c92ec3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mz\u001b[0m \u001b[0;32min\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     35\u001b[0m     \u001b[0mzs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mz\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 36\u001b[0;31m     \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     37\u001b[0m     \u001b[0my_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_test\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
          ]
        }
      ]
    }
  ]
}